{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 模型结构\n",
    "\n",
    "- 两层单向LSTM, 输出序列结果, 即(batch_size, step_size, feature_size)\n",
    "- 分别输入到1, 2, 3, 4, 5, 6共6个不同长度的卷积层中\n",
    "- 卷积层为单层\n",
    "- 对于每个问题, 将所有卷积核结果并起来\n",
    "- 将两个问题并起来的结果, 分别[相减并取绝对值], [x]\n",
    "\n",
    "## 训练技巧\n",
    "\n",
    "- 首先正常训练一定的epoch, 使用Adam方法\n",
    "- 待loss降到一定水平后, 开放embedding参数的训练, 继续使用Adam方法训练, 并加入学习率衰减callback # (效果不好, dev loss降不下去)\n",
    "- 待loss降到比较低的水平后, 改用SGD方法进行训练, 直至结束 # (貌似不可行, 取消)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from glob import glob\n",
    "from datetime import datetime"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "((10001, 300), (3049, 300))"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "DATA_PATH = \"./data/\"\n",
    "TRAIN_PATH = DATA_PATH + \"train.csv\"\n",
    "TEST_PATH = DATA_PATH + \"test.csv\"\n",
    "WORD_EMBED_PATH = DATA_PATH + \"word_embed.txt\"\n",
    "CHAR_EMBED_PATH = DATA_PATH + \"char_embed.txt\"\n",
    "QUEST_PATH = DATA_PATH + \"question.csv\"\n",
    "\n",
    "train_data = pd.read_csv(TRAIN_PATH)\n",
    "test_data = pd.read_csv(TEST_PATH)\n",
    "question_data = pd.read_csv(QUEST_PATH)\n",
    "word_embedding_data = pd.read_csv(WORD_EMBED_PATH, delimiter=\" \", header=None, index_col=0)\n",
    "char_embedding_data = pd.read_csv(CHAR_EMBED_PATH, delimiter=\" \", header=None, index_col=0)\n",
    "\n",
    "question_data[\"words\"] = question_data[\"words\"].str.split(\" \")\n",
    "question_data[\"chars\"] = question_data[\"chars\"].str.split(\" \")\n",
    "\n",
    "label = train_data[\"label\"].values\n",
    "\n",
    "from keras.preprocessing.text import Tokenizer\n",
    "\n",
    "MAX_COUNT = 10000\n",
    "\n",
    "word_tokenizer = Tokenizer(MAX_COUNT)\n",
    "word_tokenizer.fit_on_texts(question_data[\"words\"])\n",
    "\n",
    "word_embedding_data = np.concatenate(\n",
    "    (\n",
    "        np.zeros(shape=(1, word_embedding_data.shape[1]), dtype=np.float64),\n",
    "        word_embedding_data.loc[list(word_tokenizer.word_index.keys())[:MAX_COUNT]].values\n",
    "    ),\n",
    "    axis=0\n",
    ")\n",
    "\n",
    "char_tokenizer = Tokenizer(MAX_COUNT)\n",
    "char_tokenizer.fit_on_texts(question_data[\"chars\"])\n",
    "\n",
    "char_embedding_data = np.concatenate(\n",
    "    (\n",
    "        np.zeros(shape=(1, char_embedding_data.shape[1]), dtype=np.float64),\n",
    "        char_embedding_data.loc[list(char_tokenizer.word_index.keys())[:MAX_COUNT]].values\n",
    "    ),\n",
    "    axis=0\n",
    ")\n",
    "\n",
    "word_embedding_data.shape, char_embedding_data.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((254386, 30),\n",
       " (254386, 30),\n",
       " (172956, 30),\n",
       " (172956, 30),\n",
       " (254386, 30),\n",
       " (254386, 30),\n",
       " (172956, 30),\n",
       " (172956, 30))"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from keras.preprocessing.sequence import pad_sequences\n",
    "\n",
    "SEQ_LEN = 30\n",
    "\n",
    "def gen_word_data(data):\n",
    "    seq_word1 = word_tokenizer.texts_to_sequences(data.merge(question_data, how=\"left\", left_on=\"q1\", right_on=\"qid\")[\"words\"])\n",
    "    seq_word2 = word_tokenizer.texts_to_sequences(data.merge(question_data, how=\"left\", left_on=\"q2\", right_on=\"qid\")[\"words\"])\n",
    "    return pad_sequences(seq_word1, maxlen=SEQ_LEN, padding=\"pre\",truncating=\"pre\"), \\\n",
    "        pad_sequences(seq_word2, maxlen=SEQ_LEN, padding=\"pre\",truncating=\"pre\")\n",
    "    \n",
    "def gen_char_data(data):\n",
    "    seq_char1 = char_tokenizer.texts_to_sequences(data.merge(question_data, how=\"left\", left_on=\"q1\", right_on=\"qid\")[\"chars\"])\n",
    "    seq_char2 = char_tokenizer.texts_to_sequences(data.merge(question_data, how=\"left\", left_on=\"q2\", right_on=\"qid\")[\"chars\"])\n",
    "    return pad_sequences(seq_char1, maxlen=SEQ_LEN, padding=\"pre\",truncating=\"pre\"), \\\n",
    "        pad_sequences(seq_char2, maxlen=SEQ_LEN, padding=\"pre\",truncating=\"pre\")\n",
    "\n",
    "word1, word2 = gen_word_data(train_data)\n",
    "char1, char2 = gen_char_data(train_data)\n",
    "test_word1, test_word2 = gen_word_data(test_data)\n",
    "test_char1, test_char2 = gen_char_data(test_data)\n",
    "\n",
    "word1.shape, word2.shape, test_word1.shape, test_word2.shape, char1.shape, char2.shape, test_char1.shape, test_char2.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from keras.models import Model\n",
    "from keras.layers.merge import concatenate\n",
    "from keras.optimizers import Adam, SGD, Nadam\n",
    "from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau\n",
    "from keras.layers import LSTM, Bidirectional, TimeDistributed\n",
    "from keras.layers import Conv1D, MaxPool1D, GlobalAveragePooling1D\n",
    "from keras.layers import Input, Embedding, Dropout, BatchNormalization, Dense, Flatten, Lambda, K"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# general\n",
    "NUM_EPOCHES = 30\n",
    "EPOCHES1 = 5\n",
    "EPOCHES2 = 25 # 5\n",
    "EPOCHES3 = 22\n",
    "BATCH_SIZE = 1024\n",
    "DROP_RATE = 0.3\n",
    "\n",
    "# cnn\n",
    "CONV_LEN1 = 128\n",
    "CONV_LEN2 = 128\n",
    "CONV_LEN3 = 128\n",
    "CONV_LEN4 = 128\n",
    "CONV_LEN5 = 128\n",
    "CONV_LEN6 = 128\n",
    "CONV_LEN = CONV_LEN1 + CONV_LEN2 + CONV_LEN3 + CONV_LEN4 + CONV_LEN5 + CONV_LEN6\n",
    "\n",
    "# lstm\n",
    "LSTM_SIZE1 = 256\n",
    "LSTM_SIZE2 = 256\n",
    "LSTM_DROP_RATE = 0.3\n",
    "\n",
    "# dense\n",
    "DENSE_SIZE1 = 512\n",
    "DENSE_SIZE2 = 256"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def cnn_layer1(inputa, inputb, filters, kernel_size):\n",
    "    conv = Conv1D(filters=filters, kernel_size=kernel_size, padding=\"same\", activation=\"relu\")\n",
    "    conv_outputa = conv(inputa)\n",
    "    conv_outputa = GlobalAveragePooling1D()(conv_outputa)\n",
    "    conv_outputb = conv(inputb)\n",
    "    conv_outputb = GlobalAveragePooling1D()(conv_outputb)\n",
    "    return conv_outputa, conv_outputb\n",
    "    \n",
    "def cnn_layer2(inputa, inputb, filters, kernel_size):\n",
    "    conv = Conv1D(filters=filters, kernel_size=kernel_size, padding=\"same\", activation=\"relu\")\n",
    "    conv_outputa = conv(inputa)\n",
    "    conv_outputa = MaxPool1D(pool_size=SEQ_LEN)(conv_outputa)\n",
    "    conv_outputa = Flatten()(conv_outputa)\n",
    "    conv_outputb = conv(inputb)\n",
    "    conv_outputb = MaxPool1D(pool_size=SEQ_LEN)(conv_outputb)\n",
    "    conv_outputb = Flatten()(conv_outputb)\n",
    "    return conv_outputa, conv_outputb\n",
    "\n",
    "def cnn_layer3(inputa, inputb, filters, kernel_size):\n",
    "    conv = Conv1D(filters=filters, kernel_size=kernel_size, padding=\"same\", activation=\"relu\")\n",
    "    \n",
    "    conv_outputa = conv(inputa)\n",
    "    conv_outputa1 = Flatten()(MaxPool1D(pool_size=SEQ_LEN)(conv_outputa))\n",
    "    conv_outputa2 = GlobalAveragePooling1D()(conv_outputa)\n",
    "    conv_outputa = concatenate([conv_outputa1, conv_outputa2])\n",
    "    \n",
    "    conv_outputb = conv(inputb)\n",
    "    conv_outputb1 = Flatten()(MaxPool1D(pool_size=SEQ_LEN)(conv_outputb))\n",
    "    conv_outputb2 = GlobalAveragePooling1D()(conv_outputb)\n",
    "    conv_outputb = concatenate([conv_outputb1, conv_outputb2])\n",
    "    \n",
    "    return conv_outputa, conv_outputb"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# WORDS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From C:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow\\python\\util\\deprecation.py:497: calling conv1d (from tensorflow.python.ops.nn_ops) with data_format=NHWC is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "`NHWC` for data_format is deprecated, use `NWC` instead\n",
      "Train on 228946 samples, validate on 25440 samples\n",
      "Epoch 1/5\n",
      "228946/228946 [==============================] - 169s 738us/step - loss: 0.4468 - acc: 0.7899 - val_loss: 0.3382 - val_acc: 0.8574\n",
      "Epoch 2/5\n",
      "228946/228946 [==============================] - 160s 697us/step - loss: 0.3121 - acc: 0.8629 - val_loss: 0.2594 - val_acc: 0.8870\n",
      "Epoch 3/5\n",
      "228946/228946 [==============================] - 166s 726us/step - loss: 0.2687 - acc: 0.8839 - val_loss: 0.2528 - val_acc: 0.8978\n",
      "Epoch 4/5\n",
      "228946/228946 [==============================] - 166s 724us/step - loss: 0.2414 - acc: 0.8968 - val_loss: 0.2279 - val_acc: 0.9031\n",
      "Epoch 5/5\n",
      "228946/228946 [==============================] - 167s 730us/step - loss: 0.2240 - acc: 0.9047 - val_loss: 0.2136 - val_acc: 0.9123\n",
      "Train on 228946 samples, validate on 25440 samples\n",
      "Epoch 1/25\n",
      "228946/228946 [==============================] - 178s 776us/step - loss: 0.2172 - acc: 0.9079 - val_loss: 0.2190 - val_acc: 0.9129\n",
      "Epoch 2/25\n",
      "228946/228946 [==============================] - 173s 755us/step - loss: 0.2038 - acc: 0.9146 - val_loss: 0.2058 - val_acc: 0.9172\n",
      "Epoch 3/25\n",
      "228946/228946 [==============================] - 176s 770us/step - loss: 0.1922 - acc: 0.9196 - val_loss: 0.2084 - val_acc: 0.9175\n",
      "Epoch 4/25\n",
      "228946/228946 [==============================] - 175s 764us/step - loss: 0.1825 - acc: 0.9239 - val_loss: 0.2094 - val_acc: 0.9196\n",
      "Epoch 5/25\n",
      "228946/228946 [==============================] - 176s 770us/step - loss: 0.1746 - acc: 0.9277 - val_loss: 0.1939 - val_acc: 0.9237\n",
      "Epoch 6/25\n",
      "228946/228946 [==============================] - 176s 769us/step - loss: 0.1684 - acc: 0.9303 - val_loss: 0.1912 - val_acc: 0.9258\n",
      "Epoch 7/25\n",
      "228946/228946 [==============================] - 175s 766us/step - loss: 0.1616 - acc: 0.9342 - val_loss: 0.1897 - val_acc: 0.9267\n",
      "Epoch 8/25\n",
      "228946/228946 [==============================] - 172s 752us/step - loss: 0.1570 - acc: 0.9352 - val_loss: 0.1979 - val_acc: 0.9271\n",
      "Epoch 9/25\n",
      "228946/228946 [==============================] - 178s 779us/step - loss: 0.1525 - acc: 0.9376 - val_loss: 0.1896 - val_acc: 0.9278\n",
      "Epoch 10/25\n",
      "228946/228946 [==============================] - 178s 776us/step - loss: 0.1482 - acc: 0.9391 - val_loss: 0.2019 - val_acc: 0.9228\n",
      "Epoch 11/25\n",
      "228946/228946 [==============================] - 178s 775us/step - loss: 0.1446 - acc: 0.9411 - val_loss: 0.1884 - val_acc: 0.9289\n",
      "Epoch 12/25\n",
      "228946/228946 [==============================] - 176s 771us/step - loss: 0.1411 - acc: 0.9422 - val_loss: 0.1884 - val_acc: 0.9308\n",
      "Epoch 13/25\n",
      "228946/228946 [==============================] - 176s 768us/step - loss: 0.1387 - acc: 0.9427 - val_loss: 0.1833 - val_acc: 0.9323\n",
      "Epoch 14/25\n",
      "228946/228946 [==============================] - 176s 771us/step - loss: 0.1354 - acc: 0.9442 - val_loss: 0.1861 - val_acc: 0.9309\n",
      "Epoch 15/25\n",
      "228946/228946 [==============================] - 176s 771us/step - loss: 0.1327 - acc: 0.9455 - val_loss: 0.1982 - val_acc: 0.9292\n",
      "Epoch 16/25\n",
      "228946/228946 [==============================] - 177s 774us/step - loss: 0.1305 - acc: 0.9463 - val_loss: 0.2033 - val_acc: 0.9267\n",
      "Epoch 17/25\n",
      "159744/228946 [===================>..........] - ETA: 52s - loss: 0.1273 - acc: 0.9476"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-7-9eaeb7594bf4>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     97\u001b[0m         \u001b[0mvalidation_data\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mdev_x1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdev_x2\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdev_y\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     98\u001b[0m         \u001b[0mshuffle\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 99\u001b[1;33m         \u001b[0mcallbacks\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mearly_stopping\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlr_reducer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcheck_point\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    100\u001b[0m     )\n\u001b[0;32m    101\u001b[0m \u001b[1;31m#     # 第三次训练\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\keras\\engine\\training.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)\u001b[0m\n\u001b[0;32m   1040\u001b[0m                                         \u001b[0minitial_epoch\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0minitial_epoch\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1041\u001b[0m                                         \u001b[0msteps_per_epoch\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0msteps_per_epoch\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1042\u001b[1;33m                                         validation_steps=validation_steps)\n\u001b[0m\u001b[0;32m   1043\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1044\u001b[0m     def evaluate(self, x=None, y=None,\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\keras\\engine\\training_arrays.py\u001b[0m in \u001b[0;36mfit_loop\u001b[1;34m(model, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)\u001b[0m\n\u001b[0;32m    197\u001b[0m                     \u001b[0mins_batch\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mins_batch\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtoarray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    198\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 199\u001b[1;33m                 \u001b[0mouts\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mins_batch\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    200\u001b[0m                 \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mouts\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    201\u001b[0m                     \u001b[0mouts\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mouts\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, inputs)\u001b[0m\n\u001b[0;32m   2665\u001b[0m                     \u001b[1;34m'In order to feed symbolic tensors to a Keras model '\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2666\u001b[0m                     'in TensorFlow, you need tensorflow 1.8 or higher.')\n\u001b[1;32m-> 2667\u001b[1;33m             \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_legacy_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   2668\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2669\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py\u001b[0m in \u001b[0;36m_legacy_call\u001b[1;34m(self, inputs)\u001b[0m\n\u001b[0;32m   2647\u001b[0m         \u001b[0msession\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_session\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2648\u001b[0m         updated = session.run(fetches=fetches, feed_dict=feed_dict,\n\u001b[1;32m-> 2649\u001b[1;33m                               **self.session_kwargs)\n\u001b[0m\u001b[0;32m   2650\u001b[0m         \u001b[1;32mreturn\u001b[0m \u001b[0mupdated\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0moutputs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2651\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow\\python\\client\\session.py\u001b[0m in \u001b[0;36mrun\u001b[1;34m(self, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[0;32m    903\u001b[0m     \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    904\u001b[0m       result = self._run(None, fetches, feed_dict, options_ptr,\n\u001b[1;32m--> 905\u001b[1;33m                          run_metadata_ptr)\n\u001b[0m\u001b[0;32m    906\u001b[0m       \u001b[1;32mif\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    907\u001b[0m         \u001b[0mproto_data\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtf_session\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTF_GetBuffer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrun_metadata_ptr\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow\\python\\client\\session.py\u001b[0m in \u001b[0;36m_run\u001b[1;34m(self, handle, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[0;32m   1135\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0mfinal_fetches\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mfinal_targets\u001b[0m \u001b[1;32mor\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mhandle\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mfeed_dict_tensor\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1136\u001b[0m       results = self._do_run(handle, final_targets, final_fetches,\n\u001b[1;32m-> 1137\u001b[1;33m                              feed_dict_tensor, options, run_metadata)\n\u001b[0m\u001b[0;32m   1138\u001b[0m     \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1139\u001b[0m       \u001b[0mresults\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow\\python\\client\\session.py\u001b[0m in \u001b[0;36m_do_run\u001b[1;34m(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)\u001b[0m\n\u001b[0;32m   1353\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0mhandle\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1354\u001b[0m       return self._do_call(_run_fn, self._session, feeds, fetches, targets,\n\u001b[1;32m-> 1355\u001b[1;33m                            options, run_metadata)\n\u001b[0m\u001b[0;32m   1356\u001b[0m     \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1357\u001b[0m       \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_do_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_prun_fn\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_session\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mhandle\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfeeds\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfetches\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow\\python\\client\\session.py\u001b[0m in \u001b[0;36m_do_call\u001b[1;34m(self, fn, *args)\u001b[0m\n\u001b[0;32m   1359\u001b[0m   \u001b[1;32mdef\u001b[0m \u001b[0m_do_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfn\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1360\u001b[0m     \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1361\u001b[1;33m       \u001b[1;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1362\u001b[0m     \u001b[1;32mexcept\u001b[0m \u001b[0merrors\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mOpError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1363\u001b[0m       \u001b[0mmessage\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcompat\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mas_text\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0me\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow\\python\\client\\session.py\u001b[0m in \u001b[0;36m_run_fn\u001b[1;34m(session, feed_dict, fetch_list, target_list, options, run_metadata)\u001b[0m\n\u001b[0;32m   1338\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1339\u001b[0m           return tf_session.TF_Run(session, options, feed_dict, fetch_list,\n\u001b[1;32m-> 1340\u001b[1;33m                                    target_list, status, run_metadata)\n\u001b[0m\u001b[0;32m   1341\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1342\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m_prun_fn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msession\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mhandle\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "from sklearn.model_selection import StratifiedKFold\n",
    "\n",
    "best_results = []\n",
    "last_results = []\n",
    "best_file_names = []\n",
    "\n",
    "for i, (train_index, dev_index) in enumerate(StratifiedKFold(n_splits=10).split(X=word1, y=label)):  # word/char switch\n",
    "    train_x1, train_x2, train_y = word1[train_index, :], word2[train_index, :], label[train_index]  # word/char switch\n",
    "    dev_x1, dev_x2, dev_y = word1[dev_index, :], word2[dev_index, :], label[dev_index]  # word/char switch\n",
    "    \n",
    "    input1 = Input(shape=(SEQ_LEN,), dtype=\"int32\")\n",
    "    input2 = Input(shape=(SEQ_LEN,), dtype=\"int32\")\n",
    "\n",
    "    embedding_layer = Embedding(\n",
    "        input_dim=word_embedding_data.shape[0],  # word/char switch\n",
    "        output_dim=word_embedding_data.shape[1],  # word/char switch\n",
    "        weights=[word_embedding_data],  # word/char switch\n",
    "        input_length=SEQ_LEN,\n",
    "        trainable=False\n",
    "    )\n",
    "    \n",
    "    vector1 = embedding_layer(input1)\n",
    "    vector2 = embedding_layer(input2)\n",
    "    \n",
    "    lstm_layer1 = LSTM(LSTM_SIZE1, dropout=LSTM_DROP_RATE, recurrent_dropout=LSTM_DROP_RATE, return_sequences=True)\n",
    "    layer1a = lstm_layer1(vector1)\n",
    "    layer1a = Dropout(LSTM_DROP_RATE)(layer1a)\n",
    "    layer1b = lstm_layer1(vector2)\n",
    "    layer1b = Dropout(LSTM_DROP_RATE)(layer1b)\n",
    "\n",
    "    lstm_layer2 = LSTM(LSTM_SIZE2, dropout=LSTM_DROP_RATE, recurrent_dropout=LSTM_DROP_RATE, return_sequences=True)\n",
    "    layer2a = lstm_layer2(layer1a)\n",
    "    layer2b = lstm_layer2(layer1b)\n",
    "#     # 每个序列片拼接对应的原始embedding向量\n",
    "#     layer2a = concatenate([vector1, layer2a])\n",
    "#     layer2b = concatenate([vector2, layer2b])\n",
    "    \n",
    "    conv1a, conv1b = cnn_layer3(layer2a, layer2b, filters=CONV_LEN1, kernel_size=1)\n",
    "    conv2a, conv2b = cnn_layer3(layer2a, layer2b, filters=CONV_LEN2, kernel_size=2)\n",
    "    conv3a, conv3b = cnn_layer3(layer2a, layer2b, filters=CONV_LEN3, kernel_size=3)\n",
    "    conv4a, conv4b = cnn_layer3(layer2a, layer2b, filters=CONV_LEN4, kernel_size=4)\n",
    "    conv5a, conv5b = cnn_layer3(layer2a, layer2b, filters=CONV_LEN5, kernel_size=5)\n",
    "    conv6a, conv6b = cnn_layer3(layer2a, layer2b, filters=CONV_LEN6, kernel_size=6)\n",
    "    \n",
    "    merge_a = concatenate([conv1a, conv2a, conv3a, conv4a, conv5a, conv6a])\n",
    "    merge_b = concatenate([conv1b, conv2b, conv3b, conv4b, conv5b, conv6b])\n",
    "    diff = Lambda(lambda x: K.abs(x[0] - x[1]))([merge_a, merge_b])\n",
    "    mult = Lambda(lambda x: x[0] * x[1])([merge_a, merge_b])\n",
    "    merge = concatenate([diff, mult])\n",
    "    \n",
    "    x = Dropout(DROP_RATE)(merge)\n",
    "    x = BatchNormalization()(x)\n",
    "    x = Dense(DENSE_SIZE1, activation=\"relu\")(x)\n",
    "    x = Dropout(DROP_RATE)(x)\n",
    "    x = BatchNormalization()(x)\n",
    "    x = Dense(DENSE_SIZE2, activation=\"relu\")(x)\n",
    "    x = Dropout(DROP_RATE)(x)\n",
    "    x = BatchNormalization()(x)\n",
    "    pred = Dense(1, activation=\"sigmoid\")(x)\n",
    "    \n",
    "    model = Model(inputs=[input1, input2], outputs=pred)\n",
    "    \n",
    "    # 第一次训练\n",
    "    model.compile(\n",
    "        optimizer=\"nadam\",\n",
    "        loss=\"binary_crossentropy\",\n",
    "        metrics=[\"acc\"]\n",
    "    )\n",
    "    fit_res1 = model.fit(\n",
    "        x=[train_x1, train_x2],\n",
    "        y=train_y,\n",
    "        batch_size=BATCH_SIZE,\n",
    "        epochs=EPOCHES1,\n",
    "        validation_data=([dev_x1, dev_x2], dev_y),\n",
    "        shuffle=True,\n",
    "    )\n",
    "    # 第二次训练\n",
    "    embedding_layer.trainable = True\n",
    "    lr_reducer = ReduceLROnPlateau(factor=0.5, patience=4, min_lr=0.0005)\n",
    "    early_stopping = EarlyStopping(\"val_loss\", patience=8)\n",
    "    check_point = ModelCheckpoint(\n",
    "        \"./log/%s.Multi_LSTM_CNN_v2.word.{epoch:03d}.hdf5\" % (datetime.now().strftime(\"%Y%m%d-%H%M%S\")),  # word/char switch\n",
    "        monitor=\"val_loss\",\n",
    "        save_best_only=True,\n",
    "    )\n",
    "    model.compile(\n",
    "        optimizer=\"nadam\",\n",
    "        loss=\"binary_crossentropy\",\n",
    "        metrics=[\"acc\"]\n",
    "    )\n",
    "    fit_res2 = model.fit(\n",
    "        x=[train_x1, train_x2],\n",
    "        y=train_y,\n",
    "        batch_size=BATCH_SIZE,\n",
    "        epochs=EPOCHES2,\n",
    "        validation_data=([dev_x1, dev_x2], dev_y),\n",
    "        shuffle=True,\n",
    "        callbacks=[early_stopping, lr_reducer, check_point]\n",
    "    )\n",
    "    \n",
    "#     # 第三次训练\n",
    "#     model.compile(\n",
    "#         optimizer=SGD(lr=0.001),\n",
    "#         loss=\"binary_crossentropy\",\n",
    "#         metrics=[\"acc\"]\n",
    "#     )\n",
    "#     fit_res3 = model.fit(\n",
    "#         x=[train_x1, train_x2],\n",
    "#         y=train_y,\n",
    "#         batch_size=BATCH_SIZE,\n",
    "#         epochs=EPOCHES3,\n",
    "#         validation_data=([dev_x1, dev_x2], dev_y),\n",
    "#         shuffle=True,\n",
    "#         callbacks=[early_stopping, lr_reducer, check_point]\n",
    "#     )\n",
    "    \n",
    "    pred_last = model.predict([test_word1, test_word2], batch_size=BATCH_SIZE)  # word/char switch\n",
    "    last_results.append(pd.DataFrame(pred_last, columns=[\"y_pre\"]))\n",
    "    \n",
    "    best_model_file = glob(\"./log/*.hdf5\")[-1].replace(\"\\\\\", \"/\")\n",
    "    best_file_names.append(best_model_file)\n",
    "    print(\"load model %s\" % (best_model_file,))\n",
    "    model.load_weights(best_model_file)\n",
    "    pred_best = model.predict([test_word1, test_word2], batch_size=BATCH_SIZE)  # word/char switch\n",
    "    best_results.append(pd.DataFrame(pred_best, columns=[\"y_pre\"]))\n",
    "\n",
    "pd.DataFrame(pd.concat(last_results, axis=1).mean(axis=1), columns=[\"y_pre\"]).to_csv(\n",
    "    \"./result/%s-Multi_LSTM_CNN_v2_word_last.csv\" % (datetime.now().strftime(\"%Y%m%d-%H%M%S\")),  # word/char switch\n",
    "    index=False\n",
    ")\n",
    "pd.DataFrame(pd.concat(best_results, axis=1).mean(axis=1), columns=[\"y_pre\"]).to_csv(\n",
    "    \"./result/%s-Multi_LSTM_CNN_v2_word_best.csv\" % (datetime.now().strftime(\"%Y%m%d-%H%M%S\")),  # word/char switch\n",
    "    index=False\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
